Last updated: 2017-03-28

Code version: 096c6e3

Cell type and annotation type

Overview

As you can see, some cell types has multiple annotations (either different annotation or different experimental treatments). DNase and CTCF have data from multiple cell types. Also, some TFs, say Pol2, have data sets from the same cell type across multiple treatments.

library(ggplot2)
library(plyr)
data <- read.table('../data/aucs.txt', sep = '\t', header = T)
data$row_num <- seq(nrow(data))
ggplot(data) + geom_bin2d(aes(x = Cell.Type, y = TF.DNase.HistoneMark)) + theme(axis.text.x = element_text(angle = 45, hjust = 1), axis.title=element_text(size=22,face="bold"))

Cell type and annotation type pair that appears multiple times

This may due to multiple experimental treatments but some of them are duplicated experiments. The following lists all duplicated (cell type, annotation type) pair.

temp <- aggregate(data[, c('Treatment')], list(data$Cell.Type, data$TF.DNase.HistoneMark), function(x){
        # print(x)
        paste(x, collapse = ' <> ')
})
temp2 <- aggregate(data[, c('Treatment')], list(data$Cell.Type, data$TF.DNase.HistoneMark), function(x){
        # print(x)
        length(x)
})
temp3 <- aggregate(data[, c('row_num')], list(data$Cell.Type, data$TF.DNase.HistoneMark), function(x){
        # print(x)
        paste(x, collapse = ' <> ')
})
temp$num <- temp2$x
temp$row_num <- temp3$x
temp[temp$num > 1,]
          Group.1         Group.2                                                                                           x num                                              row_num
12           K562            ATF3                                                                                None <> None   2                                           334 <> 595
29          HepG2         BHLHE40                                                                                None <> None   2                                           297 <> 549
45  MCF10A-Er-Src           c-Fos                        EtOH_0.01pct <> 4OHTAM_1uM_12hr <> 4OHTAM_1uM_4hr <> 4OHTAM_1uM_36hr   4                             682 <> 683 <> 684 <> 685
50           K562           c-Jun                                                IFNa30 <> IFNa6h <> IFNg30 <> IFNg6h <> None   5                      606 <> 607 <> 608 <> 609 <> 610
52        H1-hESC           c-Myc                                                                                None <> None   2                                           473 <> 734
53        HeLa-S3           c-Myc                                                                                None <> None   2                                           508 <> 737
56           K562           c-Myc                                IFNa30 <> IFNa6h <> IFNg30 <> IFNg6h <> None <> None <> None   7        611 <> 612 <> 613 <> 614 <> 615 <> 616 <> 746
57          MCF-7           c-Myc                        estrogen <> serum_stimulated_media <> serum_starved_media <> vehicle   4                             749 <> 750 <> 751 <> 752
58  MCF10A-Er-Src           c-Myc                                                              EtOH_0.01pct <> 4OHTAM_1uM_4hr   2                                           686 <> 687
66          HepG2           CEBPB                                                                   None <> forskolin <> None   3                                    298 <> 551 <> 552
68           K562           CEBPB                                                                                None <> None   2                                           338 <> 603
71        H1-hESC            CHD1                                                                                None <> None   2                                           130 <> 470
81           K562          COREST                                                                                None <> None   2                                           617 <> 618
84           A549            CTCF                                                   DEX_100nM <> EtOH_0.02pct <> None <> None   4                             174 <> 175 <> 721 <> 764
106       GM12878            CTCF                                                                None <> None <> None <> None   4                             128 <> 416 <> 727 <> 782
112       H1-hESC            CTCF                                                                        None <> None <> None   3                                    131 <> 263 <> 735
122       HeLa-S3            CTCF                                                                        None <> None <> None   3                                    135 <> 738 <> 792
123         HepG2            CTCF                                                                None <> None <> None <> None   4                             138 <> 300 <> 741 <> 793
127          HMEC            CTCF                                                                                None <> None   2                                           140 <> 797
135         HUVEC            CTCF                                                                        None <> None <> None   3                                    146 <> 744 <> 803
138          K562            CTCF                                                        None <> None <> None <> None <> None   5                      150 <> 339 <> 619 <> 747 <> 805
139         MCF-7            CTCF        estrogen <> serum_stimulated_media <> serum_starved_media <> None <> vehicle <> None   6               753 <> 754 <> 755 <> 756 <> 757 <> 806
144          NHEK            CTCF                                                                        None <> None <> None   3                                    165 <> 761 <> 809
145          NHLF            CTCF                                                                                None <> None   2                                           168 <> 810
150    SK-N-SH_RA            CTCF                                                                                None <> None   2                                           386 <> 813
201       HeLa-S3           DNase                                                                              IFNa4h <> None   2                                             15 <> 56
238      Ishikawa           DNase                                                     Estradiol_100nM_1hr <> 4OHTAM_20nM_72hr   2                                             23 <> 24
241         LNCaP           DNase                                                                            androgen <> None   2                                             25 <> 63
242         MCF-7           DNase                                                                     Hypoxia_LacAcid <> None   2                                             26 <> 64
271     Urothelia           DNase                                                                               None <> UT189   2                                             39 <> 40
273         WI-38           DNase                                                                    4OHTAM_20nM_72hr <> None   2                                           124 <> 125
280          K562            E2F6                                                                                None <> None   2                                           341 <> 621
281       GM12878            EBF1                                                                                None <> None   2                                           212 <> 418
299         ECC-1         ERalpha                                              BPA_100nM <> Estradiol_10nM <> Genistein_100nM   3                                    199 <> 200 <> 201
300         T-47D         ERalpha                                              BPA_100nM <> Genistein_100nM <> Estradiol_10nM   3                                    394 <> 395 <> 396
325         HepG2           FOXA1                                                                                None <> None   2                                           303 <> 304
341          K562           GATA2                                                                                None <> None   2                                           347 <> 624
343         MCF-7           GATA3                                                                                None <> None   2                                           696 <> 697
346          A549              GR                                               DEX_500pM <> DEX_50nM <> DEX_5nM <> DEX_100nM   4                             181 <> 182 <> 183 <> 184
462          K562           HDAC2                                                                                None <> None   2                                           153 <> 348
465         HepG2           HNF4A                                                                           None <> forskolin   2                                           308 <> 558
471          K562            IRF1                                                        IFNa30 <> IFNa6h <> IFNg30 <> IFNg6h   4                             629 <> 630 <> 631 <> 632
477       H1-hESC            JunD                                                                                None <> None   2                                           268 <> 476
479         HepG2            JunD                                                                                None <> None   2                                           310 <> 561
488         HepG2            MafK                                                                                None <> None   2                                           563 <> 564
497          K562             Max                                                                                None <> None   2                                           349 <> 637
547         HepG2            NRSF                                                                                None <> None   2                                           314 <> 315
551       SK-N-SH            NRSF                                                                                None <> None   2                                           383 <> 384
554       GM12878            p300                                                                        None <> None <> None   3                                    225 <> 430 <> 431
557         HepG2            p300                                                                                None <> None   2                                           316 <> 569
558          K562            p300                                                                                None <> None   2                                           155 <> 645
571          A549            Pol2                                                           DEX_100nM <> EtOH_0.02pct <> None   3                                    187 <> 188 <> 722
575       GM12878            Pol2                                                                None <> None <> None <> None   4                             231 <> 432 <> 434 <> 728
576       GM12891            Pol2                                                                                None <> None   2                                           250 <> 452
577       GM12892            Pol2                                                                                None <> None   2                                           257 <> 454
584       H1-hESC            Pol2                                                                                None <> None   2                                           273 <> 736
587       HeLa-S3            Pol2                                                                        None <> None <> None   3                                    294 <> 529 <> 739
588         HepG2            Pol2                                                           None <> forskolin <> None <> None   4                             318 <> 571 <> 572 <> 742
589         HUVEC            Pol2                                                                        None <> None <> None   3                                    333 <> 587 <> 745
591          K562            Pol2                        None <> IFNa30 <> IFNa6h <> IFNg30 <> IFNg6h <> None <> None <> None   8 355 <> 646 <> 647 <> 648 <> 649 <> 650 <> 653 <> 748
592         MCF-7            Pol2                                       serum_stimulated_media <> serum_starved_media <> None   3                                    758 <> 759 <> 760
593 MCF10A-Er-Src            Pol2                                                             EtOH_0.01pct <> 4OHTAM_1uM_36hr   2                                           689 <> 690
618          K562 Pol2(phosphoS2)                                                                                None <> None   2                                           651 <> 652
629       GM12878           Rad21                                                                                None <> None   2                                           234 <> 436
630       H1-hESC           Rad21                                                                                None <> None   2                                           275 <> 481
632         HepG2           Rad21                                                                                None <> None   2                                           319 <> 574
634          K562           Rad21                                                                                None <> None   2                                           357 <> 655
650          K562          SETDB1                                                                              MNaseD <> None   2                                           658 <> 659
686          K562           STAT1                                                        IFNa30 <> IFNa6h <> IFNg30 <> IFNg6h   4                             662 <> 663 <> 664 <> 665
687          K562           STAT2                                                                            IFNa30 <> IFNa6h   2                                           666 <> 667
690 MCF10A-Er-Src           STAT3 EtOH_0.01pct_4hr <> EtOH_0.01pct_12hr <> EtOH_0.01pct <> 4OHTAM_1uM_12hr <> 4OHTAM_1uM_36hr   5                      691 <> 692 <> 693 <> 694 <> 695
709          K562           TBLR1                                                                                None <> None   2                                           669 <> 670
722       HeLa-S3          TCF7L2                                                                                None <> None   2                                           539 <> 540
739          A549            USF1                                                   DEX_100nM <> EtOH_0.02pct <> EtOH_0.02pct   3                                    193 <> 194 <> 195
752       GM12878             YY1                                                                                None <> None   2                                           245 <> 447
758          K562             YY1                                                                        None <> None <> None   3                                    370 <> 371 <> 677
781          K562          ZNF274                                                                                None <> None   2                                           680 <> 681

Same annotation type, different cell types

Remove all treatments other than “None” (namely ignore complexity from experimental treatment) and count the number of experiments we have per cell type and annotation type pair. Here, CTCF and DNase have many experiments across cell types.

data_none <- data[data$Treatment == 'None' & !is.na(data$Treatment),]
ggplot(data_none) + geom_bin2d(aes(x = Cell.Type, y = TF.DNase.HistoneMark)) + theme(axis.text.x = element_text(angle = 45, hjust = 1), axis.title=element_text(size=22,face="bold"))

Here we further explore the performance of DeepSEA on different cell types given a particular annotation type (only consider “None” treatment).

Performance distribution within a particular annotation type across cell types

Check out yanyu_lib.R here

source('../../cell_type/yanyu_lib.R')
data_none_ctcf <- data_none[data_none$TF.DNase.HistoneMark == 'CTCF',]
data_none_ctcf <- as_num(data_none_ctcf)
p1 <- ggplot(data_none_ctcf) + geom_histogram(aes(x = DeepSEA.ROC.AUC), bins = 20) + ggtitle('ROC AUC in CTCF data sets')
p2 <- ggplot(data_none_ctcf) + geom_histogram(aes(x = DeepSEA.PR.AUC), bins = 20) + ggtitle('PR AUC in CTCF data sets')
multiplot(p1, p2)

data_none_dnase <- data_none[data_none$TF.DNase.HistoneMark == 'DNase',]
data_none_dnase <- as_num(data_none_dnase)
p1 <- ggplot(data_none_dnase) + geom_histogram(aes(x = DeepSEA.ROC.AUC), bins = 20) + ggtitle('ROC AUC in DNase data sets')
p2 <- ggplot(data_none_dnase) + geom_histogram(aes(x = DeepSEA.PR.AUC), bins = 20) + ggtitle('PR AUC in DNase data sets')
multiplot(p1, p2)

As you can see, some cell types have low accuracy than others.

Distance between datasets

For each data set pair, compute how many instances have the same labels and how many have different labels. This setup is done by the following scripts at /project2/xinhe/yanyul/deep_variant/yanyu/DeepSEA/ and test_all.h5 is generated from test.mat by /project2/xinhe/yanyul/deep_variant/yanyu/DanQ/my_train/scripts/generate_test_data.py. The following shows how /data/test_all.h5.y_disance.hdf5 is generated. Check out compute_distance.py here

$ python my_scripts/compute_distance.py ../DanQ/my_train/data/test_all.h5 my_test/

To compute the distance between two sets, Jaccard distance is used here (check definition here)

library(h5)
library(reshape2)
dnase_start <- 1
dnase_end <- 125
tf_start <- 126
tf_end <- 815
hist_start <- 816
hist_end <- 919  # these information is derived from /DeepVariantAnnotation/data/nmeth.3547-S3.xlsx
f <- h5file('../data/test_all.h5.y_disance.hdf5', 'r')
distance <- 1 - f['one_one'][] / (f['one_one'][] + f['zero_one'][] + f['one_zero'][])
melted_distance <- melt(distance)
ggplot(melted_distance) + geom_tile(aes(x = Var1, y = Var2, fill = value)) + scale_fill_gradient(low = "white", high = "black") + geom_vline(xintercept = (dnase_end + tf_start) / 2, color = 'red') + geom_hline(yintercept = (dnase_end + tf_start) / 2, color = 'red') + geom_vline(xintercept = (tf_end + hist_start) / 2, color = 'red') + geom_hline(yintercept = (tf_end + hist_start) / 2, color = 'red') + coord_fixed() + ggtitle('Distance between datasets \n (red line separate DNase/TF/Hist, southwest -> northeast)') + labs(x = 'dataset ID', y = 'dataset ID', fill = 'Jaccard \n distance')

The trend is within-group distance is smaller than between-group distance.

CTCF zoom in

Let’s only take a look at instances with CTCF binding (treatment = ‘None’). Most of them have some similarity to other members but there are two out liers (ID = 775, 815).

out <- zoom_in(melted_distance, data_none_ctcf)
sub.ctcf <- out$sub
data_none_ctcf <- out$ori
ggplot() + geom_tile(data = sub.ctcf,aes(x = as.factor(Var1), y = as.factor(Var2), fill = value)) + scale_fill_gradient(low = "white", high = "black") + coord_fixed() + theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.text = element_text(size = 5)) + theme(legend.position = "bottom")

The following shows the relationship between mean distance within-group and ROC/PR AUC. As you can see, ID 755 and 815 are outliers.

p1 <- ggplot(data_none_ctcf, aes(x = dist.mean, y = DeepSEA.ROC.AUC, label = row_num)) + geom_text()
p2 <- ggplot(data_none_ctcf, aes(x = dist.mean, y = DeepSEA.PR.AUC, label = row_num)) + geom_text()
multiplot(p1, p2, cols=1)

For your interest, the following shows the cell types that either have worst ROC/PR AUC or are far from others.

Select the ones which are furthest to others.

data[data_none_ctcf[order(data_none_ctcf$dist.mean, decreasing = T)[1:5],]$row_num,]
    Cell.Type TF.DNase.HistoneMark Treatment DeepSEA.ROC.AUC DanQ.ROC.AUC DanQ.JASPAR.ROC.AUC DeepSEA.PR.AUC  DanQ.PR.AUC DanQ.JASPAR.PR.AUC row_num
775   GM12801                 CTCF      None     0.996388547 0.9953458122        0.9970761167    0.262530792 0.3311145781       0.3156353128     775
815     WI-38                 CTCF      None     0.934407177 0.9415380569        0.9465819889   0.6887911683 0.7130838079       0.7255293238     815
796     HL-60                 CTCF      None      0.99534579 0.9964539538        0.9963882656   0.6719854829 0.7162271327       0.7213985799     796
735   H1-hESC                 CTCF      None     0.986843822 0.9908473757        0.9916254659   0.5461145512 0.6243978119       0.6397933306     735
756     MCF-7                 CTCF      None     0.979445883  0.984418652        0.9851506937   0.5992568532 0.6440205591        0.652074639     756

Select the ones which gets lowest ROC AUC scores.

data[data_none_ctcf[order(data_none_ctcf$DeepSEA.ROC.AUC)[1:5],]$row_num,]
    Cell.Type TF.DNase.HistoneMark Treatment DeepSEA.ROC.AUC DanQ.ROC.AUC DanQ.JASPAR.ROC.AUC DeepSEA.PR.AUC  DanQ.PR.AUC DanQ.JASPAR.PR.AUC row_num
815     WI-38                 CTCF      None     0.934407177 0.9415380569        0.9465819889   0.6887911683 0.7130838079       0.7255293238     815
135   HeLa-S3                 CTCF      None     0.964978887 0.9710963274        0.9741564603   0.6521248673 0.6899951195       0.7035668817     135
170   Osteobl                 CTCF      None      0.96530837 0.9720288862         0.974641306   0.6701567958 0.7027804547       0.7124233217     170
150      K562                 CTCF      None     0.970549293 0.9758263405        0.9778692369   0.6238341427 0.6608491882       0.6744054116     150
144  HSMMtube                 CTCF      None     0.972644397 0.9779363134        0.9807862107   0.6377985839 0.6742370664       0.6878329335     144

Select the ones which gets lowest PR AUC scores.

data[data_none_ctcf[order(data_none_ctcf$DeepSEA.PR.AUC)[1:5],]$row_num,]
    Cell.Type TF.DNase.HistoneMark Treatment DeepSEA.ROC.AUC DanQ.ROC.AUC DanQ.JASPAR.ROC.AUC DeepSEA.PR.AUC  DanQ.PR.AUC DanQ.JASPAR.PR.AUC row_num
775   GM12801                 CTCF      None     0.996388547 0.9953458122        0.9970761167    0.262530792 0.3311145781       0.3156353128     775
732   GM19239                 CTCF      None     0.981206568 0.9854088462        0.9863621522   0.5228896013 0.5787069864       0.5869761929     732
735   H1-hESC                 CTCF      None     0.986843822 0.9908473757        0.9916254659   0.5461145512 0.6243978119       0.6397933306     735
416   GM12878                 CTCF      None     0.979011087 0.9840308561        0.9850540848   0.5499689112 0.6069305147       0.6184375267     416
730   GM12892                 CTCF      None     0.976850556 0.9838173802        0.9840161027   0.5557373624 0.6069473908       0.6099822526     730

DNase zoom in

The same analysis is done on DNase group (treatment = ‘None’).

out <- zoom_in(melted_distance, data_none_dnase)
sub.dnase <- out$sub
data_none_dnase <- out$ori
ggplot() + geom_tile(data = sub.dnase,aes(x = as.factor(Var1), y = as.factor(Var2), fill = value)) + scale_fill_gradient(low = "white", high = "black") + coord_fixed() + theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.text = element_text(size = 5)) + theme(legend.position = "bottom")

p1 <- ggplot(data_none_dnase, aes(x = dist.mean, y = DeepSEA.ROC.AUC, label = row_num)) + geom_text()
p2 <- ggplot(data_none_dnase, aes(x = dist.mean, y = DeepSEA.PR.AUC, label = row_num)) + geom_text()
multiplot(p1, p2, cols=1)

Select the ones which are furthest to others.

data[data_none_dnase[order(data_none_dnase$dist.mean, decreasing = T)[1:5],]$row_num,]
      Cell.Type TF.DNase.HistoneMark Treatment DeepSEA.ROC.AUC DanQ.ROC.AUC DanQ.JASPAR.ROC.AUC DeepSEA.PR.AUC  DanQ.PR.AUC DanQ.JASPAR.PR.AUC row_num
49       Caco-2                DNase      None     0.955102255 0.9602247859        0.9614734632   0.4413681283 0.4660432973       0.4649057247      49
67      GM06990                DNase      None     0.902955171 0.9063258167        0.9130242555   0.3256947324 0.3396407621       0.3519833383      67
122         Th2                DNase      None     0.905005703 0.9103992578        0.9111198867   0.3293858306 0.3493359431       0.3472368336     122
121  SK-N-SH_RA                DNase      None     0.939063718 0.9438625773        0.9498986815   0.3975102061 0.4261801756       0.4398179731     121
16  Hepatocytes                DNase      None     0.881975064 0.8856014479        0.8869534689    0.285556575 0.3026511231       0.3029602655      16

Select the ones which gets lowest ROC AUC scores.

data[data_none_dnase[order(data_none_dnase$DeepSEA.ROC.AUC)[1:5],]$row_num,]
   Cell.Type TF.DNase.HistoneMark Treatment DeepSEA.ROC.AUC DanQ.ROC.AUC DanQ.JASPAR.ROC.AUC DeepSEA.PR.AUC  DanQ.PR.AUC DanQ.JASPAR.PR.AUC row_num
5    Fibrobl                DNase      None     0.837919632 0.8407263803        0.8431350521   0.3407087383 0.3526279102       0.3572207165       5
28    Melano                DNase      None     0.847363066 0.8506737609        0.8531173679   0.3536066271 0.3662819206       0.3742151035      28
30   Osteobl                DNase      None     0.849303246 0.8538104396        0.8558780011   0.3503940085  0.363640452        0.367974622      30
27   Medullo                DNase      None     0.864042242 0.8664238813        0.8692186275   0.3158031211 0.3271526972       0.3335229698      27
66       Th1                DNase      None     0.865216236 0.8684588339        0.8701070274   0.3582683957 0.3696926502       0.3746784214      66

Select the ones which gets lowest PR AUC scores.

data[data_none_dnase[order(data_none_dnase$DeepSEA.PR.AUC)[1:5],]$row_num,]
     Cell.Type TF.DNase.HistoneMark Treatment DeepSEA.ROC.AUC DanQ.ROC.AUC DanQ.JASPAR.ROC.AUC DeepSEA.PR.AUC  DanQ.PR.AUC DanQ.JASPAR.PR.AUC row_num
16 Hepatocytes                DNase      None     0.881975064 0.8856014479        0.8869534689    0.285556575 0.3026511231       0.3029602655      16
74       HA-sp                DNase      None     0.867568074 0.8749524879          0.87873071    0.310400002 0.3207391676       0.3324358266      74
27     Medullo                DNase      None     0.864042242 0.8664238813        0.8692186275   0.3158031211 0.3271526972       0.3335229698      27
3      Chorion                DNase      None     0.896293207 0.9002669789        0.9008547544   0.3207430835 0.3384839394       0.3367010903       3
67     GM06990                DNase      None     0.902955171 0.9063258167        0.9130242555   0.3256947324 0.3396407621       0.3519833383      67

Codes

yanyu_lib.R

zoom_in <- function(melted_distance, data_none_ctcf){
  sub.ctcf.first.ind <- melted_distance$Var1 %in% data_none_ctcf$row_num
  sub.ctcf.second.ind <- melted_distance$Var2 %in% data_none_ctcf$row_num
  sub.ctcf <- melted_distance[sub.ctcf.first.ind & sub.ctcf.second.ind, ]
  sub.ctcf$Var1.sub <- rep(1:nrow(data_none_ctcf), nrow(data_none_ctcf))
  sub.ctcf$Var2.sub <- rep(1:nrow(data_none_ctcf), each=nrow(data_none_ctcf))
  data_none_ctcf$DeepSEA.ROC.AUC <- as.numeric(as.character(data_none_ctcf$DeepSEA.ROC.AUC))
  data_none_ctcf$DeepSEA.PR.AUC <- as.numeric(as.character(data_none_ctcf$DeepSEA.PR.AUC))
  data_none_ctcf$dist.mean <- aggregate(sub.ctcf$value, list(sub.ctcf$Var1), mean)$x
  return(list(sub=sub.ctcf, ori=data_none_ctcf))
}

as_num <- function(x){
  x$DeepSEA.ROC.AUC <- as.numeric(as.character(x$DeepSEA.ROC.AUC))
  x$DeepSEA.PR.AUC <- as.numeric(as.character(x$DeepSEA.PR.AUC))
  return(x)
}

## derived from http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }

  if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}
## end

compute_distance.py

import sys
if sys.argv[1] == '--help':
    print('compute_distance.py [input_data_hdf5] [output_dir]')
    sys.exit()

import h5py

sys.path.insert(0, '/project2/xinhe/yanyul/deep_variant/yanyu/DanQ/my_scripts')
from keras_mylib import my_load_model, load_standard_hdf5_data, my_build_model
import numpy as np
import ntpath
import os
import theano
import theano.tensor as T
from theano import function

print('Build computational graph in theano')
x = T.dmatrix('x')
y = T.dmatrix('y')
z = theano.tensor.dot(x, y)
dot = function([x, y], z)

print('Loading data')
X, y = load_standard_hdf5_data(sys.argv[1])
outname = sys.argv[2] + os.sep + ntpath.basename(sys.argv[1]) + '.y_disance.hdf5'

print('One & One')
one_one = dot(y.T, y)
f = h5py.File(outname, 'w')
f.create_dataset('one_one',data=one_one)
f.close()

print('Zero & Zero')
zero_zero = dot(1 - y.T, 1 - y)
f = h5py.File(outname, 'a')
f.create_dataset('zero_zero',data=zero_zero)
f.close()

print('Zero & One')
zero_one = dot(1 - y.T, y)
f = h5py.File(outname, 'a')
f.create_dataset('zero_one',data=zero_one)
f.close()

print('One & Zero')
one_zero = dot(y.T, 1 - y)
f = h5py.File(outname, 'a')
f.create_dataset('one_zero',data=one_zero)
f.close()

Session information

sessionInfo()
R version 3.3.3 (2017-03-06)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: OS X Yosemite 10.10.5

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] workflowr_0.4.0 plyr_1.8.4      rmarkdown_1.4   reshape2_1.4.2  h5_0.9.8        ggplot2_2.2.1  

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.10     rstudioapi_0.6   knitr_1.15.1     magrittr_1.5     munsell_0.4.3    colorspace_1.3-2 stringr_1.2.0    tools_3.3.3      gtable_0.2.0     git2r_0.18.0     htmltools_0.3.5  yaml_2.1.14      lazyeval_0.2.0   assertthat_0.1   digest_0.6.12    rprojroot_1.2    tibble_1.2       evaluate_0.10    labeling_0.3     stringi_1.1.3    scales_0.4.1     backports_1.0.5 

This R Markdown site was created with workflowr